# "Efficiency and water use: Dynamic effects of irrigation technology adoption"
# by Micah Cameron-Harp and Nathan Hendricks
# 
# Code written by Micah Cameron-Harp
# May 16th, 2024
# 
# This r script produces balanced dynamic treatment effect estimates for 
# both transitions during the period when the pre-test passes (1996-2005)
# using the Callaway and Sant'Anna estimator. 
# These are displayed in Tables D.4, D.5, and D.6 
pacman::p_load(
  did,
  tidyverse,
  openxlsx,
  dplyr)

# Parse arguments submitted by STATA
args = commandArgs(trailingOnly = "TRUE")
arg1 <- args[1]

#Define directories and set working directory
# NOTE - The root directory will be passed here from the main_text_results.do file 
dr_root <- paste0(arg1)
dr_data <- paste0(dr_root, "/data")
dr_output = paste0(dr_root, "/outputs")
dr_output_main = paste0(dr_root, "/outputs/main_text")
dr_output_app = paste0(dr_root, "/outputs/appendices")
dr_output_log = paste0(dr_root, "/outputs/logs")
dr_temp = paste0(dr_root, "/data/intermediate")
setwd(dr_temp)

########################################
#---------- Flood to Center Pivot or LEPA -----#
#PASSES FOR 1996 to 2005 years
wrg_flood_cplepa <- read.csv(file = file.path(dr_temp, "flood_cporlepa_prepped.csv"))

#Filter to the period when pre-trend passes
wrg_flood_cplepa <- wrg_flood_cplepa %>% dplyr::filter(wua_year >=1996 & wua_year <= 2005)

#Create a new dataframe to store the results in 
df_template <- data.frame(matrix(ncol=7,nrow=0, 
                                 dimnames=list(NULL, c("bal_e", "dep_var", "effect_estimated",
                                                       "estimate", "se_estimate", 
                                                       "c_crit_val", "panel_mean_dep_var"))))

#Create version of template to store the event study coefficients
dyn_df <- df_template

#List the dependent variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

for (i in effect_order) {
  dep_var <- i
  #Make a df of just the dep var so we can get its mean value
  df_dep_var <- wrg_flood_cplepa %>% dplyr::select(all_of(dep_var))
  mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
  #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
  sharp_floodcplepa_attgt_nyt <- att_gt(yname = dep_var,
                                        tname = "wua_year",
                                        idname = "WR_GROUP",
                                        gname = "cohort_flood_cplepa",
                                        data = wrg_flood_cplepa,
                                        panel = TRUE,
                                        allow_unbalanced_panel = TRUE,
                                        xformla=~hyd_cond+predev_sat+awc+sandtotal+silttotal+init_af_used,
                                        est_method = "dr",
                                        control_group = c("notyettreated"),
                                        anticipation=0,
                                        clustervars = "WR_GROUP", 
                                        biters=1000
  )
  #Look at BALANCED Dynamic effects
  for (bal_e in 1:10) {
    #Store the event study coefficients
    agg.es <- aggte(sharp_floodcplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf, balance_e = bal_e)
    effect_order <- c(agg.es[["egt"]])
    est_lst <- c(agg.es[["att.egt"]])
    se_lst <- c(agg.es[["se.egt"]])
    e_c <- agg.es$crit.val.egt
    for (i in 1:length(effect_order)) {
      dyn_df[nrow(dyn_df)+1,] <- c(bal_e, dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
    }
  } #Inner loop through balanced effects period
} #Outer loop through dependant variables
#Create the 95% ci variables
dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate),
                             transition = "flood to center pivot or LEPA")
dyn_df <- dyn_df %>% dplyr::filter(bal_e %in% c(2,4,6,8))
#Save as excel workbook
dataset_names <- list('cs_event' = dyn_df)
write.xlsx(dataset_names, file.path(dr_output_app, "/tableD4_D5_D6.xlsx"))

#Remove objects
rm(wrg_flood_cplepa, sharp_floodcplepa_attgt_nyt, dyn_df, att_df)
gc()

#----------------------------------------------# 
#---------- Center Pivot to LEPA --------------#
#----------------------------------------------# 
#Only 1996 to 2005
wrg_cp_lepa <- read.csv(file = file.path(dr_temp, "cp_lepa_prepped.csv"))
wrg_cp_lepa <- wrg_cp_lepa %>% dplyr::filter(wua_year >= 1996 & wua_year <= 2005)

#Create a new dataframe to store the results in 
df_template <- data.frame(matrix(ncol=7,nrow=0, 
                                 dimnames=list(NULL, c("bal_e", "dep_var", "effect_estimated",
                                                       "estimate", "se_estimate", 
                                                       "c_crit_val", "panel_mean_dep_var"))))

#Create version of template to store the event study coefficients
dyn_df <- df_template

#List the dependent variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

for (i in effect_order) {
  dep_var <- i
  #Make a df of just the dep var so we can get its mean value
  df_dep_var <- wrg_cp_lepa %>% dplyr::select(all_of(dep_var))
  mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
  #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
  sharp_cplepa_attgt_nyt <- att_gt(yname = dep_var,
                                   tname = "wua_year",
                                   idname = "WR_GROUP",
                                   gname = "cohort_cp_lepa",
                                   data = wrg_cp_lepa,
                                   panel = TRUE,
                                   allow_unbalanced_panel = TRUE,
                                   xformla=~hyd_cond+predev_sat+sandtotal+silttotal+init_af_used,
                                   est_method = "dr",
                                   control_group = c("notyettreated"),
                                   anticipation=0,
                                   clustervars = "WR_GROUP", 
                                   biters=1000
  )
  #Look at BALANCED Dynamic effects
  for (bal_e in 1:8) {
    #Store the event study coefficients
    agg.es <- aggte(sharp_cplepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf, balance_e = bal_e)
    effect_order <- c(agg.es[["egt"]])
    est_lst <- c(agg.es[["att.egt"]])
    se_lst <- c(agg.es[["se.egt"]])
    e_c <- agg.es$crit.val.egt
    for (i in 1:length(effect_order)) {
      dyn_df[nrow(dyn_df)+1,] <- c(bal_e, dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
    }
  } #Inner loop through balanced effects period
} #Outer loop through dependant variables
#Create the 95% ci variables
dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate),
                             transition = "center pivot to LEPA")
#Save as excel workbook
dataset_names <- list('cs_event' = dyn_df)
write.xlsx(dataset_names, file.path(dr_output_app, "/tableD7_D8_D9.xlsx"))
